SETUP

suppressPackageStartupMessages({
  library(package = "knitr")
  library(package = "Biobase")
  library(package = "RColorBrewer")
  library(package = "tidyverse")
  library(package = "ggbeeswarm")
  library(package = "pvca") # dleelab/pvca
  library(package = "lme4")
  library(package = "cluster")
  library(package = "ComplexHeatmap") # Bioc Install
  library(package = "ImmuneSignatures2")
  library(package = "cowplot")
  library(here)
})

These figures only look at the “Young” cohort that is defined as 18 to 50 years old and the “Extended Old” cohort of 50 to 90 years old. The “Old” cohort (not used here) was 60 to 90 years old.

subsetToCompleteGenes <- function(eset) {
  # only include complete cases
  # TODO: Check in with IS2 folks about this... SHould I update the datsets?
  completeGenes <- complete.cases(exprs(eset))
  eset[completeGenes]
}

noRespGEFile <- file.path(dataCacheDir, paste0(datePrefix,"extendedOld_norm_noResponse_eset.rds"))
old.NoResp <- subsetToCompleteGenes(readRDS(file = noRespGEFile))


noRespGEFile <- file.path(dataCacheDir, paste0(datePrefix,"young_norm_noResponse_eset.rds"))
young.NoResp <- subsetToCompleteGenes(readRDS(file = noRespGEFile))

withRespGEFile <- file.path(dataCacheDir, paste0(datePrefix,"young_norm_withResponse_eset.rds"))
young.WithResp <- subsetToCompleteGenes(readRDS(file = withRespGEFile))

FIGURES

Figure 1

Figure 1a: Proportion of subjects of various self reported race and imputed y chrom values per pathogenvaccine type Authors:* Joanna Arce

Combine the data for a full dataset with all ages

getPdata <- function(eset){
  pd <- as_tibble(pData(eset))
  pd <- pd %>% dplyr::select(uid, everything())
  pd$age_reported <- as.numeric(pd$age_reported)
  return(pd)
}

pdata_young <- getPdata(young.NoResp)
pdata_old <- getPdata(old.NoResp)

pdata_df <- rbind(pdata_young, pdata_old)

Pre-processing

# Remove malaria - why?
pdata_no_malaria <- filter(pdata_df, study_accession != "SDY1293")

# Filter to unique subjects
pdata_df_uniqueSubjects <- distinct(pdata_no_malaria, 
                                    participant_id, 
                                    y_chrom_present, 
                                    race, 
                                    pathogen, 
                                    vaccine_type)

# Create new column for pathogen and vaccine
pdata_df_uniqueSubjects <- mutate(pdata_df_uniqueSubjects, 
                                  pathogen_vacc = paste0(pathogen, "\n(", vaccine_type, ")"))

getCounts <- function(pd, colnm, prefix){
  counts_df <- arrange(dplyr::count(pd, .data[[colnm]], pathogen_vacc), n)
  counts_df$pathogen_total <- sapply(1:nrow(counts_df), function(i){
    return(sum(filter(counts_df, pathogen_vacc == counts_df$pathogen_vacc[i])$n))
  })
  counts_df$pathogen_vacc <- paste0(counts_df$pathogen_vacc, "\nn = ", counts_df$pathogen_total)
  counts_df$n <- counts_df$n/counts_df$pathogen_total
  colnames(counts_df) <- c("category", "pathogen_vacc", "freq")
  counts_df$category <- paste0(prefix, counts_df$category)
  return(counts_df)
}

sex_counts_df <- getCounts(pdata_df_uniqueSubjects, "y_chrom_present", "YCHROM_")
race_counts_df <- getCounts(pdata_df_uniqueSubjects, "race", "RACE_")

total_counts <- rbind(sex_counts_df, race_counts_df)
names(colors.fig1a) <- c(unique(total_counts$pathogen_vacc)[c(2, 11, 3, 12, 8)],
                       "Malaria (Recombinant protein)",
                       unique(total_counts$pathogen_vacc)[c(7, 6, 5, 1, 4, 9, 10)])
p <- ggplot(total_counts) +
  geom_bar(aes(x = freq, y = category, fill = pathogen_vacc), 
           color = "black", stat = "identity", lwd = .2) +
  geom_hline(yintercept = 7.5, color = "grey", lwd = .4) +
  facet_wrap(vars(pathogen_vacc), nrow = 2) +
  scale_y_discrete(limits = rev(c("YCHROM_TRUE", 
                                  "YCHROM_FALSE",
                                  "RACE_White", 
                                  "RACE_Black or African American", 
                                  "RACE_Asian", 
                                  "RACE_American Indian or Alaska Native", 
                                  "RACE_Other", 
                                  "RACE_Not Specified", 
                                  "RACE_Unknown")),
                   labels = rev(c("Male", 
                                  "Female",
                                  "White", 
                                  "Black or African American", 
                                  "Asian", 
                                  "American Indian or Alaska Native", 
                                  "Other", 
                                  "Not Specified", 
                                  "Unknown"))) +
  scale_x_continuous(breaks = c(0, .2, .4, .6, .8, 1), 
                     labels = c(0, 20, 40, 60, 80, 100)) +
  scale_fill_manual(values = colors.fig1a) +
  guides(fill = FALSE) +
  xlab("Distribution (%)") +
  ylab(NULL) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90))

p

Figure 1b: Plot number of samples as a function of day of sampling

Authors: Slim Fourati, Joanna Arce

# any sampling data after 20 days coded as >20
df.fig1b <- pData(young.NoResp) %>%
  select(uid, study_time_collected, pathogen, vaccine_type) %>%
  arrange(study_time_collected) %>%
  mutate(study_time_collected = signif(study_time_collected, digits = 3),
     study_time_collected.factor = ifelse(test = study_time_collected > 20,
                          yes  = ">20",
                          no   = study_time_collected),
     study_time_collected.factor =
       factor(study_time_collected.factor,
          levels = unique(study_time_collected.factor)),
     vaccine = paste0(pathogen, " (", vaccine_type, ")")) %>%
  group_by(study_time_collected.factor, vaccine) %>%
  summarize(n = n())

ggplot(data = df.fig1b,
       mapping = aes(x = study_time_collected.factor,
                         y = n)) +
  geom_bar(stat = "identity", mapping = aes(fill = vaccine)) +
  labs(x = "Days post-vaccination", y = "Number of samples") +
  scale_y_continuous(limits = c(0, 800),
                         breaks = seq(from = 0, to = 800, by = 100)) + 
  scale_fill_manual(name   = "Pathogen (Vaccine type)",
                        values = vaccine2color) +
  theme_bw() +
  theme(panel.grid         = element_blank(),
        panel.grid.major.y = element_line(color = "lightgrey"),
        axis.text          = element_text(color = "black"),
        axis.text.x        = element_text(angle = 45, hjust = 1),
        legend.pos         = "bottom",
        legend.text        = element_text(size = 7),
        legend.key.height  = unit(0.02, units = "npc"),
        legend.key.width   = unit(0.015, units = "npc")) +
  guides(fill = guide_legend(ncol = 2))

Figure 1c: Plot age as a function of vaccine investigated

Author: Slim Fourati

df.fig1c <- pData(young.NoResp) %>%
  select(participant_id, pathogen, vaccine_type, age_imputed, y_chrom_present) %>%
  distinct() %>%
  mutate(vaccine = paste0(pathogen, " (", vaccine_type, ")"))

# remove vaccine where all the participant have the same age
flag <- df.fig1c %>%
  group_by(vaccine) %>%
  summarize(nbUniqueAge = length(unique(age_imputed))) %>%
  filter(nbUniqueAge > 1)

df.fig1c <- filter(df.fig1c, vaccine %in% flag$vaccine)

ggplot(data = df.fig1c,
       mapping = aes(x = vaccine,
                         y = age_imputed)) +
  geom_boxplot(fill = "transparent", outlier.color = "transparent") +
  geom_beeswarm(mapping = aes(color = vaccine, shape = y_chrom_present),
                    cex = 0.3) +
  scale_color_manual(name   = "Pathogen (Vaccine type)",
                     values = vaccine2color) +
  labs(x = "Pathogen (Vaccine type)", y = "Age") + 
  theme_bw() +
  theme(panel.grid         = element_blank(),
        panel.grid.major.y = element_line(color = "lightgrey"),
        axis.text          = element_text(color = "black"),
        axis.text.x        = element_text(angle = 45, hjust = 1, size = 8),
        legend.text        = element_text(size = 7),
        legend.key.height  = unit(0.02, units = "npc"),
        legend.key.width   = unit(0.015, units = "npc")) +
  guides(fill = guide_legend(ncol = 1))

# test for difference between vaccine
kruskal.test(formula = age_imputed ~ vaccine, data = df.fig1c)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  age_imputed by vaccine
## Kruskal-Wallis chi-squared = 110.1, df = 7, p-value < 2.2e-16

Figure 1d: Proportion of Variance Explained

Author: Slim Fourati

em.fig1d <- exprs(young.NoResp)
pd.fig1d <- pData(young.NoResp) %>%
  mutate(Vaccine = paste0(pathogen, " (", vaccine_type, ")")) %>%
  select(uid, y_chrom_present, ethnicity, Vaccine, study_time_collected,
     age_imputed) %>%
  rename(
    Ychrom = y_chrom_present, 
    Ethnicity = ethnicity,
    Timepoint = study_time_collected,
    Age       = age_imputed) %>%
  column_to_rownames(var = "uid")
pd.fig1d <- pd.fig1d[colnames(em.fig1d), ]
df.fig1d <- data.frame(explained = as.vector(fit),
                       effect    = names(fit)) %>%
  arrange(explained) %>%
  mutate(effect = factor(effect, levels = effect))

ggplot(data    = df.fig1d,
       mapping = aes(x = effect, y = explained)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = signif(explained, digits = 3)),
            nudge_y   = 0.01,
            size      = 3) +
  labs(x = NULL, y = "Proportion of the variance explained") + 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90,
                                   vjust = 0.5,
                                   hjust = 1))

Perform Sample Level Enrichment Analysis on hallmark+BTM genesets

Author: Slim Fourati

Pre-Processing: Microarray probes may have different affinities for their target mRNA and thus the itensities can’t be compared between probes inside the same sample. A way to correct for that is to scale each probe to a mean of 0 and a standard-deviation of 1.

The following looks only at pre-vaccination timepoints

  sleaFile <- file.path(dataCacheDir, "sleaSet.rds")
  sleaCached <- file.exists(sleaFile)
  if(sleaCached){
    sleaSet <- readRDS(sleaFile)
  }
batch <- interaction(young.NoResp$study_accession,
                     young.NoResp$matrix,
                     drop = TRUE)
scaledMat <- by(data = t(exprs(young.NoResp)),
                INDICES = batch,
                FUN = function(x) scale(x)) %>%
  do.call(what = rbind) %>%
  t()
scaledMat <- scaledMat[, sampleNames(young.NoResp)]

# SLEA (Ref: Lopez-Bigas N. et al. (2008) Genome Biol.)
B <- 100
flag <- lapply(all_genesets, FUN = intersect, y = rownames(scaledMat)) %>%
  sapply(FUN = length)
zscoreMat <- sapply(all_genesets, FUN = function(gs) {
  gs <- intersect(gs, rownames(scaledMat))
  ngenes <- length(gs)
  mu <- colMeans(scaledMat[gs, , drop = FALSE], na.rm = TRUE)
  muPermut <- mclapply(1:B, FUN = function(seed) {
    set.seed(seed = seed)
    muHat <- colMeans(scaledMat[sample.int(nrow(scaledMat), ngenes), , drop = FALSE],
                      na.rm = TRUE)
    return(value = muHat)
  })
  muPermut <- do.call(what = rbind, args = muPermut)
  zscore <- (mu - colMeans(muPermut, na.rm = TRUE)) /
    apply(muPermut, MARGIN = 2, FUN = sd, na.rm = TRUE)
  return(value = zscore)
})
sleaSet <- ExpressionSet(assayData = t(zscoreMat),
                         phenoData = AnnotatedDataFrame(pData(young.NoResp)))
saveRDS(sleaSet, file = sleaFile)
inflamGS <- c("HALLMARK_INFLAMMATORY_RESPONSE",
              "HALLMARK_COMPLEMENT",
              "HALLMARK_IL6_JAK_STAT3_SIGNALING",
              "HALLMARK_TNFA_SIGNALING_VIA_NFKB")

Figure 2: Heatmap of SLEA zscore on pre-vaccination samples

Author: Slim Fourati

zscoreTemp <- exprs(sleaSet)[, sleaSet$study_time_collected <= 0]

# remove genesets with missing symbols in virtual study
zscoreTemp <- zscoreTemp[rowMeans(is.na(zscoreTemp)) < 1, ]

# for representation purpsis only present top 200 varying genesets
varLS <- apply(zscoreTemp, MARGIN = 1, FUN = var)
zscoreTemp <- zscoreTemp[order(varLS, decreasing = TRUE)[1:200], ]

# cutree into three groups (see gap statistic analysis
set.seed(seed = 23)
tree_col  <- kmeans(x = t(zscoreTemp), centers = 3)
gr <- tree_col$cluster

# relabel cluster based on inflammatory genesets
df.fig2 <- data.frame(mu = colMeans(zscoreTemp[inflamGS, ])) %>%
  rownames_to_column()

df.fig2 <- data.frame(grn = gr) %>%
  rownames_to_column() %>%
  merge(x = df.fig2) %>%
  merge(y = pData(sleaSet),
        by.x = "rowname",
        by.y = "uid")

df.fig2 %>%
  group_by(grn) %>%
  summarize(q2 = median(mu)) %>%
  ungroup() %>%
  mutate(gr = ifelse(q2 %in% min(q2),
                     yes = "low",
                     no  = NA),
         gr = ifelse(q2 %in% max(q2) & is.na(gr),
                     yes = "high",
                     no  = gr),
         gr = ifelse(is.na(gr),
                     yes = "mid",
                     no  = gr),
         gr = factor(gr, levels = c("low", "mid", "high"))) %>%
  merge(x = df.fig2,
        by = "grn") %>%
  column_to_rownames(var = "rowname") -> df.fig2

df.fig2 <- df.fig2[colnames(zscoreTemp), ]

# Remove names of unwanted cells in plot
rowLabels <- rownames(zscoreTemp)

targetCellNames <- "B.cell|E2F|T.cell|MYC| NK| Interferon|STAT|migration|Monocytes| DC|Neutrophiles"

unwantedCells <- !grepl(pattern = targetCellNames, 
                        rownames(zscoreTemp),
                        ignore.case = TRUE)

rowLabels[ unwantedCells ] <- ""

# generate heatmap annotation
ha <- df.fig2 %>%
      rownames_to_column() %>%
      mutate(inflam.gs = findInterval(mu,
                                      vec = quantile(mu,
                                                             probs = c(0, 0.33, 0.66, 1)),
                                              rightmost.closed = TRUE),
             inflam.gs = paste0("tier", inflam.gs)) %>%
      select(rowname, inflam.gs) %>%
      column_to_rownames() %>%
      .[colnames(zscoreTemp), , drop = FALSE] %>%
      HeatmapAnnotation(df = ., 
                        col = list(inflam.gs = c(tier1 = "yellow",
                                                 tier2 = "orange",
                                                 tier3 = "red")))

Heatmap(matrix                      = zscoreTemp,
        row_names_side              = "left", 
        show_column_names           = FALSE,
        column_split                = df.fig2$gr,
        show_row_dend               = FALSE,
        name                        = "SLEA z-score",
        row_labels                  = rowLabels,
        row_names_gp                = gpar(fontsize = 8),
        top_annotation              = ha)

Figure s2

Figure s2a: Determine optimal number of cluster using the Gap statistic

Author: Slim Fourati

clusGapFile <- file.path(dataCacheDir, "clusGap.rds")
if(!file.exists(clusGapFile)){
  set.seed(seed = 23)
  fit <- clusGap(t(zscoreTemp), kmeans, K.max = 10)
  saveRDS(fit, file = clusGapFile)
}else{
  fit <- readRDS(clusGapFile)
}

plot(fit)

print(fit, method = "firstSEmax")
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = t(zscoreTemp), FUNcluster = kmeans, K.max = 10)
## B=100 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
##  --> Number of clusters (method 'firstSEmax', SE.factor=1): 1
##           logW   E.logW       gap      SE.sim
##  [1,] 9.092772 9.773302 0.6805300 0.005232080
##  [2,] 8.983406 9.661745 0.6783390 0.003892934
##  [3,] 8.935179 9.626523 0.6913443 0.004567634
##  [4,] 8.901936 9.596598 0.6946614 0.004983855
##  [5,] 8.883434 9.578408 0.6949743 0.004300859
##  [6,] 8.870609 9.563215 0.6926062 0.004140528
##  [7,] 8.849370 9.550025 0.7006544 0.003918490
##  [8,] 8.839935 9.538047 0.6981121 0.004245120
##  [9,] 8.828216 9.526725 0.6985095 0.003917492
## [10,] 8.815861 9.516875 0.7010137 0.003458210

Assess association with clinical variable

# continuous variable
df.fig2 %>%
  select(gr,
         age_reported,
         age_imputed) %>%
  pivot_longer(cols = -gr) %>%
  group_by(name) %>%
  do(p = kruskal.test(formula = value~gr,
                      data    = .)$p.value) %>%
  ungroup() %>%
  mutate(p = unlist(p))
# categorical variable
df.fig2 %>%
   select(gr,
          study_time_collected,
          gender, # TODO: Use ychrom_present? 
          race,
          ethnicity,
          exposure_material_reported,
          matrix,
          study_accession,
          Hispanic,
          White,
          Asian,
          Black,
          cell_type,
          cohort,
          featureSetName,
          featureSetName2,
          featureSetVendor,
          vaccine,
          vaccine_type,
          adjuvant,
          pathogen) %>%
  sapply(FUN = as.character) %>%
  as.data.frame() %>%
  pivot_longer(cols = -gr) %>%
  group_by(name) %>%
  do(p = {
    fit <- try(fisher.test(table(.$value, .$gr),
                           simulate.p.value = TRUE),
               silent = TRUE)
    if ("try-error" %in% class(fit)) {
      NA
    } else {
      fit$p.value
    }
  }) %>%
  ungroup() %>%
  mutate(p = unlist(p), adj.p = p.adjust(p, method = "BH"))

Figure s2b: Stability of the signature over time

Author: Slim Fourati

df.fig2sb <- df.fig2 %>%
            filter(mu < 4.5 & mu > -4.5) %>% # remove outlier
            filter(duplicated(participant_id) |
                   duplicated(participant_id, fromLast = TRUE))

df.fig2sb %>%
  filter(study_accession %in% c("SDY80", "SDY180")) %>%
  arrange(participant_id, study_time_collected) %>%
  wilcox.test(formula = mu ~ study_time_collected,
              data    = .,
              paired  = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  mu by study_time_collected
## V = 716, p-value = 0.00831
## alternative hypothesis: true location shift is not equal to 0
df.fig2sb %>%
  filter(study_accession %in% c("SDY80", "SDY180")) %>%                            
  select(participant_id, gr, mu, study_time_collected) %>%
  pivot_wider(names_from  = study_time_collected,
              values_from = c(gr, mu))  %>%
  mutate(stable = (`gr_-7` == `gr_0`) | abs(`mu_0`-`mu_-7`)< 1) %>%
  group_by(stable) %>%
  summarize(n = n())
df.fig2sb %>%
  filter(study_accession %in% c("SDY80", "SDY180")) %>%                            
  select(participant_id, gr, mu, study_time_collected) %>%
  pivot_wider(names_from  = study_time_collected,
              values_from = c(gr, mu))  %>%
  mutate(stable = (`gr_-7` == `gr_0`) | abs(`mu_0`-`mu_-7`)< 1) %>%
  select(participant_id, stable) %>%
  merge(x = df.fig2sb, by = "participant_id", all.x = TRUE) -> df.fig2sb

ggplot(data = df.fig2sb,
        mapping = aes(x = factor(study_time_collected), y = mu)) +
        geom_point(mapping = aes(color = gr)) +
        geom_line(mapping = aes(group = participant_id,
                                linetype  = stable)) +
        scale_color_discrete(name = "pre-vax cluster") +
        labs(x = "study_time_collected",
             y = "Inflammatory genesets (SLEA z-score)") +
        theme_bw()

Figure 3

Figure 3 Helper Functions

getSleaPDSubset.byGS <- function(geneset){
  pd <- pData(sleaSet)
  pd$mu <- colMeans(exprs(sleaSet)[geneset, ])
  pd <- pd[ !is.na(pd$gr), ]
}

getSleaPDSubset.byGenes <- function(genes){
  df <- exprs(young.NoResp)[genes, ] %>%
          as.data.frame() %>%
          rownames_to_column(var = "gene") %>%
          pivot_longer(cols = -gene, names_to = "uid") %>%
          merge(y = pData(sleaSet), by = "uid") %>%
          filter(!is.na(gr))
}

plotSLEASubsetByTime.byGS <- function(pd, ylabel){
  ggplot(data = pd,
          mapping = aes(x = study_time_collected, y = mu, color = gr)) +
          geom_line(mapping = aes(group = participant_id),
                    alpha = 0.1) +
          geom_hline(yintercept = 0, color = "grey", size = 2) +
          geom_smooth(method = "loess", formula = "y~x", color = "black") +
          facet_wrap(facets = ~gr) +
          scale_x_continuous(limits = c(0, 28)) +
          labs(y = ylabel,
               x = "Time after vaccination (days)") +
          theme_bw() +
          theme(axis.text    = element_text(size = 15),
                strip.text   = element_text(size = 20),
                axis.title.x = element_text(size = 20),
                legend.pos   = "none",
                panel.grid  = element_blank())
}

plotSLEASubsetByTime.byGene <- function(df){
  q2DF <- df %>%
          group_by(gene) %>%
          summarize(q2 = median(value))
  
  ggplot(data = df,
          mapping = aes(x = study_time_collected, y = value, color = gr)) +
          geom_line(mapping = aes(group = participant_id),
                    alpha = 0.1) +
          geom_hline(data    = q2DF,
                     mapping = aes(yintercept = q2),
                     color   = "grey") +
          geom_smooth(method = "loess", formula = "y~x", color = "black") +
          facet_grid(facets = gene~gr) +
          scale_x_continuous(limits = c(0, 28)) +
          scale_y_continuous(limits = c(7, 10.5)) +
          labs(y = "log2 intensity (ComBat)") +
          theme_bw() +
          theme(axis.text    = element_text(size = 15),
                strip.text   = element_text(size = 20),
                axis.title.x = element_text(size = 20),
                legend.pos   = "none",
                panel.grid  = element_blank())
}

Figure 3a: Plot SLEA z-score of inflammatory genesets over time

Author: Slim Fourati

pd.fig3a <- getSleaPDSubset.byGS(inflamGS)
plotSLEASubsetByTime.byGS(pd.fig3a, "Inflammatory pathways (SLEA z-score)")

Figure s3a: Plot canonical inflammatory genes over time

Author: Slim Fourati

inflamGenes <- c("IL1B", "NLRP3", "TNFAIP2")
df.figs3a <- getSleaPDSubset.byGenes(inflamGenes)
plotSLEASubsetByTime.byGene(df.figs3a)

Figure s3b: Plot SLEA z-score of interferon genesets over time

Author: Slim Fourati

interferonGS  <- c("HALLMARK_INTERFERON_ALPHA_RESPONSE",
                   "HALLMARK_INTERFERON_GAMMA_RESPONSE")
pd.figs3b <- getSleaPDSubset.byGS(interferonGS)
plotSLEASubsetByTime.byGS(pd.figs3b, "Interferon-stimulated genes (SLEA z-score)")

Figure s3b (cont): Plot canonical interferon-regulated genes over time

Author: Slim Fourati

isgGenes <- c("STAT2", "IRF9", "MX1")
df.figs3b2 <- getSleaPDSubset.byGenes(isgGenes)
plotSLEASubsetByTime.byGene(df.figs3b2)

Figure 3c: Plot SLEA z-score of B cell genesets over time

Author: Slim Fourati

bcellGS <- c("enriched in B cells (I) (M47.0)",
             "enriched in B cells (V) (M47.4)",
             "plasma cells & B cells, immunoglobulins (M156.0)")
pd.fig3c <- getSleaPDSubset.byGS(bcellGS)
plotSLEASubsetByTime.byGS(pd.fig3c, "B cells (SLEA z-score)")

Figure s3c: Plot canonical B cell markers over time

Author: Slim Fourati

bcellGenes <- c("CD79A", "CD79B", "BANK1")
df.figs3c <- getSleaPDSubset.byGenes(bcellGenes)
plotSLEASubsetByTime.byGene(df.figs3c)

Figure 4

Figure 4a: Plot MFC based on inflamDF

Author: Slim Fourati

pd.fig4a <- pData(sleaSet)[sleaSet$study_time_collected <= 0, ]

df.fig4a <- pd.fig4a %>%
              merge(y = distinct(select(pData(young.WithResp), 
                                        participant_id, MFC, MFC_p30))) %>%
                group_by(study_accession) %>%
                mutate(sMFC = scale(MFC),
                       vaccine = paste0(pathogen,".", vaccine_type),
                       vaccine = ifelse(pathogen %in% "Meningococcus",
                                        yes = "Meningococcus",
                                        no  = vaccine))

ggplot(data = df.fig4a,
       mapping = aes(x = factor(gr), y = sMFC)) +
       geom_beeswarm(cex = 1.1, size = 0.75) +
       geom_boxplot(outlier.color = "transparent", 
                    fill = "transparent", 
                    col = "red") +
       labs(x = "Prevax inflammatory cluster") +
       theme_bw()

kruskal.test(x = df.fig4a$sMFC, g = df.fig4a$gr)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df.fig4a$sMFC and df.fig4a$gr
## Kruskal-Wallis chi-squared = 3.0735, df = 2, p-value = 0.2151
pairwise.wilcox.test(x = df.fig4a$sMFC, 
                     g = df.fig4a$gr, 
                     p.adjust.method = "none") %>%
    print()
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df.fig4a$sMFC and df.fig4a$gr 
## 
##      low   mid  
## mid  0.339 -    
## high 0.088 0.380
## 
## P value adjustment method: none

Table s4: Difference in MFC between low and high inflam. group per vaccine

Author: Slim Fourati

df.fig4a %>%
  filter(age_imputed < 50 & gr %in% c("low", "high")) %>%
  group_by(vaccine) %>%
  do(n = nrow(.),
    estimate = {
        fit <- try(wilcox.test(formula = sMFC~gr, 
                               data = ., 
                               conf.int = TRUE),
                   silent = TRUE)
        if ("try-error" %in% class(fit)) {
          NA
        } else {
          fit$estimate
        }
    },
    p = {
        fit <- try(wilcox.test(formula = sMFC~gr, data = ., conf.int = TRUE),
                 silent = TRUE)
      if ("try-error" %in% class(fit)) {
        NA
      } else {
        fit$p.value
      }
    }) %>%
    mutate(n = unlist(n),
           estimate = unlist(estimate), 
           p = unlist(p)) %>%
    print()
## # A tibble: 9 x 4
## # Rowwise: 
##   vaccine                                   n   estimate      p
##   <chr>                                 <int>      <dbl>  <dbl>
## 1 Hepatitis B.Inactivated                  14 -0.0000656 0.940 
## 2 Influenza.Inactivated                   365 -0.226     0.0720
## 3 Influenza.Live attenuated                21 -0.0000207 0.373 
## 4 Meningococcus                            22 -0.0579    0.794 
## 5 Pneumococcus.Polysaccharide               6  0.695     0.0902
## 6 Smallpox.Live attenuated                  3  1.22      0.667 
## 7 Tuberculosis.Recombinant Viral Vector     8  0.121     1     
## 8 Varicella Zoster.Live attenuated         14 -0.171     0.606 
## 9 Yellow Fever.Live attenuated             65 -0.0152    0.703

Figure 4-split: Split by Influenza.Inactivated or others

Author: Slim Fourati

plotTemp <- filter(df.fig4a, !(vaccine %in% c("Pneumococcus.Polysaccharide",
                                              "Smallpox.Live attenuated",
                                              "Tuberculosis.Recombinant Viral Vector")))

ggplot(data = filter(plotTemp, age_imputed < 50),
        mapping = aes(x = factor(gr), y = sMFC)) +
        geom_beeswarm() +
        geom_boxplot(outlier.color = "transparent", 
                     fill = "transparent", 
                     col = "red") +
        labs(x = "Prevax inflammatory cluster", 
             title = "age < 50") +
        facet_wrap(facets = ~ifelse(test= vaccine %in% "Influenza.Inactivated",
                                    yes = "Influenza.Inactivated",
                                    no = "others")) +
        theme_bw()

filter(plotTemp, vaccine %in% "Influenza.Inactivated") %>%
  (function(p) { pairwise.wilcox.test(x = p$sMFC,
                                      g = p$gr,
                                      p.adjust.method = "none")}) %>%
  print()
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  p$sMFC and p$gr 
## 
##      low   mid  
## mid  0.982 -    
## high 0.072 0.087
## 
## P value adjustment method: none
filter(plotTemp, vaccine != "Influenza.Inactivated") %>%
  (function(p) { pairwise.wilcox.test(x = p$sMFC,
                                      g = p$gr,
                                      p.adjust.method = "none")}) %>%
  print()
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  p$sMFC and p$gr 
## 
##      low   mid  
## mid  0.036 -    
## high 0.182 0.431
## 
## P value adjustment method: none